import requests
import re,json,urllib,os
from bs4 import BeautifulSoup
from urllib import request
def get_one_page(url):
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36/'}
    response=requests.get(url,headers=headers)
    if response.status_code==200:
        return  response.text
    return None
#
def main():
    url='https://www.toutiao.com/api/search/content/?aid=24&app_name=web_search&offset=0&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20&en_qc=1&cur_tab=1&from=search_tab&pd=synthesis'
    html=get_one_page(url)
    return html


def parse_one_page(html):
    pattern=re.compile(
        '"article_url":"(.*?)","behot_time"',re.S)
    items=re.findall(pattern,html)

    print(items)
    return items
    # for i in items:print(i)
parse_one_page(main())
def get_page(items):
    for item in items:
        html=get_one_page(item)
        print(html)
        images_=re.compile(
            'src&#x3D;&quot;(.*?)&quot;',re.S)
        title_=re.compile(
            'title: \'(.*?)\',',re.S )

        images = re.findall(images_, html)
        title=re.findall(title_,html)[0]

        print(images,title)
        path_1 = 'F:/python_work/1/' + title[:6] + '/'
        if not os.path.exists(path_1): os.mkdir(path_1)
        for image in images:
            r=requests.get(image)
            path = 'F:/python_work/1/'+title[:6]+'/'+image[-19:]+'.jpg'
            print(path)
            url=image+'.jpg'
            # path = "F:\python_work\1\.jpg" #设置图片文件路径，前提是必须要有abc这个文件夹
            r = requests.request('get',url) #获取网页
            print(r.status_code)
            with open(path,'ab') as f:  #打开写入到path路径里-二进制文件，返回的句柄名为f
                f.write(r.content)  #往f里写入r对象的二进制文件
            f.close()
# get_page()
